{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import math"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#train_df = pd.read_csv(r\"D:\\workplace\\RSdata\\train.csv\")\n",
    "train_df = pd.read_csv(r\"C:\\Users\\CloudCross\\workspace\\train.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df = train_df.set_index(['uid','iid'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def sim_pearson(prefs,p1,p2):#皮尔森距离\n",
    "    df_1 = prefs.ix[p1]\n",
    "    df_2 = prefs.ix[p2]\n",
    "    index1 = df_1.index\n",
    "    index2 = df_2.index\n",
    "    si = index1.intersection(index2)\n",
    "    n = len(si)\n",
    "    if n==0:\n",
    "        return 1\n",
    "    #对所有偏好求和\n",
    "    sum1=sum([df_1.loc[it,'score'] for it in si])\n",
    "    sum2=sum([df_2.loc[it,'score'] for it in si])\n",
    "    \n",
    "    #求平方和\n",
    "    sum1Sq=sum([df_1.loc[it,'score']**2 for it in si])\n",
    "    sum2Sq=sum([df_2.loc[it,'score']**2 for it in si])\n",
    "    \n",
    "    #求乘积之和\n",
    "    pSum=sum([(df_1.loc[it,'score'])*(df_2.loc[it,'score']) for it in si])\n",
    "    \n",
    "    #计算皮尔森评价值\n",
    "    num = pSum - (sum1*sum2/n)\n",
    "    den = math.sqrt((sum1Sq-(sum1**2)/n)*(sum2Sq-(sum2**2)/n))\n",
    "    if den==0: return 0\n",
    "    r = num/den\n",
    "    return r"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "20\n"
     ]
    }
   ],
   "source": [
    "df_1 = df.ix[0]\n",
    "df_2 = df.ix[1]\n",
    "index1 = df_1.index\n",
    "index2 = df_2.index\n",
    "si = index1.intersection(index2)\n",
    "n = len(si)\n",
    "print(n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "53\n",
      "60\n"
     ]
    }
   ],
   "source": [
    "sum1=sum([df_1.loc[it,'score'] for it in si])\n",
    "sum2=sum([df_2.loc[it,'score'] for it in si])\n",
    "print(sum1)\n",
    "print(sum2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "163\n",
      "214\n"
     ]
    }
   ],
   "source": [
    "sum1Sq=sum([df_1.loc[it,'score']**2 for it in si])\n",
    "sum2Sq=sum([df_2.loc[it,'score']**2 for it in si])\n",
    "print(sum1Sq)\n",
    "print(sum2Sq)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "162\n",
      "3.0\n",
      "27.689348132449783\n",
      "0.108344912479\n"
     ]
    }
   ],
   "source": [
    "    pSum=sum([(df_1.loc[it,'score'])*(df_2.loc[it,'score']) for it in si])\n",
    "    print(pSum)\n",
    "    num = pSum - (sum1*sum2/n)\n",
    "    den = math.sqrt((sum1Sq-(sum1**2)/n)*(sum2Sq-(sum2**2)/n))\n",
    "    print(num)\n",
    "    print(den)\n",
    "    r = num/den\n",
    "    print(r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-0.050145160963\n",
      "0.06244921684265137\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "on = time.time()\n",
    "\n",
    "print(sim_pearson(df,2,1))\n",
    "\n",
    "print(time.time()-on)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#由对iid1的其他人的评价，以及和p1的相似度，加权给出预测p1对于商品iid1的评分\n",
    "def get_pre_score(prefs,df,p1,iid1,similarity=sim_pearson):\n",
    "    pdf = prefs.ix[prefs.iid==iid1,:].ix[:,['uid','score']]\n",
    "    def f(x):\n",
    "        return similarity(df,p1,x.loc['uid'])\n",
    "    sims = pdf.apply(f,axis=1)\n",
    "    ser1 = sims*pdf['score']\n",
    "    return sum(ser1[ser1>0])/sum(sims[sims>0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "test_df = pd.read_csv(r\"C:\\Users\\CloudCross\\workspace\\test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "test = test_df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_apply_standard\u001b[1;34m(self, func, axis, ignore_failures, reduce)\u001b[0m\n\u001b[0;32m   4097\u001b[0m                     result = lib.reduce(values, func, axis=axis, dummy=dummy,\n\u001b[1;32m-> 4098\u001b[1;33m                                         labels=labels)\n\u001b[0m\u001b[0;32m   4099\u001b[0m                     \u001b[1;32mreturn\u001b[0m \u001b[0mSeries\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mpandas\\src\\reduce.pyx\u001b[0m in \u001b[0;36mpandas.lib.reduce (pandas\\lib.c:43789)\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\src\\reduce.pyx\u001b[0m in \u001b[0;36mpandas.lib.Reducer.get_result (pandas\\lib.c:33921)\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32m<ipython-input-105-18a87e64e63d>\u001b[0m in \u001b[0;36mg\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mget_pre_score\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain_df\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'uid'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'iid'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0msimilarity\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msim_pearson\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-98-71aaf28decb6>\u001b[0m in \u001b[0;36mget_pre_score\u001b[1;34m(prefs, df, p1, iid1, similarity)\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mget_pre_score\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprefs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mp1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0miid1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0msimilarity\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msim_pearson\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m     \u001b[0mpdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mprefs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mix\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mprefs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miid\u001b[0m\u001b[1;33m==\u001b[0m\u001b[0miid1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mix\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'uid'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'score'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m     75\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 76\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_tuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     77\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_getitem_tuple\u001b[1;34m(self, tup)\u001b[0m\n\u001b[0;32m    799\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 800\u001b[1;33m             \u001b[0mretval\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mretval\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    801\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_getitem_axis\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m    995\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 996\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_iterable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    997\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_getitem_iterable\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m   1024\u001b[0m             \u001b[0minds\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnonzero\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1025\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minds\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1026\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mtake\u001b[1;34m(self, indices, axis, convert, is_copy)\u001b[0m\n\u001b[0;32m   1627\u001b[0m                                    \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_block_manager_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1628\u001b[1;33m                                    convert=True, verify=True)\n\u001b[0m\u001b[0;32m   1629\u001b[0m         \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mtake\u001b[1;34m(self, indexer, axis, verify, convert)\u001b[0m\n\u001b[0;32m   3645\u001b[0m         return self.reindex_indexer(new_axis=new_labels, indexer=indexer,\n\u001b[1;32m-> 3646\u001b[1;33m                                     axis=axis, allow_dups=True)\n\u001b[0m\u001b[0;32m   3647\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mreindex_indexer\u001b[1;34m(self, new_axis, indexer, axis, fill_value, allow_dups, copy)\u001b[0m\n\u001b[0;32m   3535\u001b[0m         \u001b[0mnew_axes\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnew_axis\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3536\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_blocks\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnew_axes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3537\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, blocks, axes, do_integrity_check, fastpath)\u001b[0m\n\u001b[0;32m   2540\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2541\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_rebuild_blknos_and_blklocs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2542\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36m_rebuild_blknos_and_blklocs\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m   2628\u001b[0m             \u001b[0mnew_blknos\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mrl\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mblkno\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2629\u001b[1;33m             \u001b[0mnew_blklocs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mrl\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2630\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: ",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-105-18a87e64e63d>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[0mon\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mg\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      6\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      7\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mon\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, func, axis, broadcast, raw, reduce, args, **kwds)\u001b[0m\n\u001b[0;32m   4040\u001b[0m                     \u001b[1;32mif\u001b[0m \u001b[0mreduce\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4041\u001b[0m                         \u001b[0mreduce\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4042\u001b[1;33m                     \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_apply_standard\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mreduce\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   4043\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4044\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_apply_broadcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_apply_standard\u001b[1;34m(self, func, axis, ignore_failures, reduce)\u001b[0m\n\u001b[0;32m   4098\u001b[0m                                         labels=labels)\n\u001b[0;32m   4099\u001b[0m                     \u001b[1;32mreturn\u001b[0m \u001b[0mSeries\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4100\u001b[1;33m                 \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   4101\u001b[0m                     \u001b[1;32mpass\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4102\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ],
     "output_type": "error"
    }
   ],
   "source": [
    "def g(x):\n",
    "    return get_pre_score(train_df,df,x.loc['uid'],x.loc['iid'],similarity=sim_pearson)\n",
    "import time\n",
    "on = time.time()\n",
    "print(test.apply(g,axis=1))\n",
    "\n",
    "print(time.time()-on)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pdf = train_df.ix[train_df.iid==12960,:].ix[:,['uid','score']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1092392"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_df."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def f(x):\n",
    "    return sim_pearson(df,0,x.loc['uid'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "sims = pdf.apply(f,axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.30256767451932998"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sim_pearson(df,0,38)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "26880       0.302568\n",
       "31156       0.094319\n",
       "448385      0.277038\n",
       "1571379     0.307249\n",
       "1976170     0.407416\n",
       "2141366     0.311439\n",
       "2144229     0.067664\n",
       "2486393     0.495077\n",
       "2829838     0.145727\n",
       "2862791     0.003549\n",
       "3002274    -0.077809\n",
       "3157460     0.347711\n",
       "3284724     0.181223\n",
       "3705484    -0.101781\n",
       "3713215     0.302125\n",
       "4176206     0.190508\n",
       "4387163     0.009557\n",
       "4536446    -0.163359\n",
       "4541250     0.219352\n",
       "4766050     0.095131\n",
       "4911574    -0.049019\n",
       "4926941     0.189949\n",
       "5148867     0.152788\n",
       "5153936     0.114879\n",
       "5398504     0.135745\n",
       "5511536     0.341823\n",
       "5734197     0.113365\n",
       "5830053    -0.070239\n",
       "5881614    -0.011432\n",
       "6109689     0.205027\n",
       "              ...   \n",
       "32302927    0.173570\n",
       "32326350    0.136125\n",
       "32344164    0.058168\n",
       "32359917    0.479249\n",
       "32433896    0.088443\n",
       "32500106    0.087791\n",
       "32503085    0.067713\n",
       "32551787    0.554918\n",
       "32616589   -0.577350\n",
       "32702191    0.216698\n",
       "32728986    0.000000\n",
       "32739987    0.359715\n",
       "32756042    0.156590\n",
       "32775952    0.381401\n",
       "32785707    0.137672\n",
       "32792832    0.255719\n",
       "32827920    0.074658\n",
       "32839641    0.140253\n",
       "32850907    0.015413\n",
       "32856215    0.227888\n",
       "32865342    0.068549\n",
       "32932706    0.453146\n",
       "32949461    0.327805\n",
       "32953290    0.056354\n",
       "32969253    0.220994\n",
       "32970056   -0.256931\n",
       "33008427    0.280316\n",
       "33073335    0.128574\n",
       "33129132    0.500000\n",
       "33148491    0.573944\n",
       "dtype: float64"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sims"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ser1 = sims*pdf['score']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "26880       1.210271\n",
       "31156       0.282957\n",
       "448385      0.831115\n",
       "1571379     1.536247\n",
       "1976170     1.629662\n",
       "2141366     1.245756\n",
       "2144229     0.202992\n",
       "2486393     2.475383\n",
       "2829838     0.582907\n",
       "2862791     0.017745\n",
       "3002274    -0.311234\n",
       "3157460     1.738557\n",
       "3284724     0.724894\n",
       "3705484    -0.407124\n",
       "3713215     1.208499\n",
       "4176206     0.952540\n",
       "4387163     0.038229\n",
       "4536446    -0.653437\n",
       "4541250     0.877408\n",
       "4766050     0.285393\n",
       "4911574    -0.245094\n",
       "4926941     0.569846\n",
       "5148867     0.763942\n",
       "5153936     0.574397\n",
       "5398504     0.678723\n",
       "5511536     1.025469\n",
       "5734197     0.453461\n",
       "5830053    -0.351195\n",
       "5881614    -0.057161\n",
       "6109689     1.025137\n",
       "              ...   \n",
       "32302927    0.694281\n",
       "32326350    0.544499\n",
       "32344164    0.290838\n",
       "32359917    2.396243\n",
       "32433896    0.353771\n",
       "32500106    0.438955\n",
       "32503085    0.338566\n",
       "32551787    2.774590\n",
       "32616589   -2.886751\n",
       "32702191    0.866793\n",
       "32728986    0.000000\n",
       "32739987    1.079145\n",
       "32756042    0.782951\n",
       "32775952    1.907003\n",
       "32785707    0.550688\n",
       "32792832    0.767158\n",
       "32827920    0.298631\n",
       "32839641    0.701263\n",
       "32850907    0.061652\n",
       "32856215    1.139441\n",
       "32865342    0.342743\n",
       "32932706    1.812584\n",
       "32949461    0.655610\n",
       "32953290    0.112708\n",
       "32969253    1.104969\n",
       "32970056   -0.513862\n",
       "33008427    0.840948\n",
       "33073335    0.514294\n",
       "33129132    2.500000\n",
       "33148491    2.869720\n",
       "dtype: float64"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.3176938637644104"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(ser1[ser1>0])/sum(sims[sims>0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "test = test_df.head(50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uid</th>\n",
       "      <th>iid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>12960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>12726</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>11463</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>10739</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>3441</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1</td>\n",
       "      <td>13291</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1</td>\n",
       "      <td>2814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1</td>\n",
       "      <td>2857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2</td>\n",
       "      <td>12860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>2</td>\n",
       "      <td>11091</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2</td>\n",
       "      <td>13057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>3</td>\n",
       "      <td>8992</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>3</td>\n",
       "      <td>11082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>3</td>\n",
       "      <td>2665</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>3</td>\n",
       "      <td>12570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>3</td>\n",
       "      <td>13410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>3</td>\n",
       "      <td>12714</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>3</td>\n",
       "      <td>14649</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>3</td>\n",
       "      <td>2635</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>4</td>\n",
       "      <td>14339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>4</td>\n",
       "      <td>13000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>5</td>\n",
       "      <td>1326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>5</td>\n",
       "      <td>2308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>5</td>\n",
       "      <td>1934</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>5</td>\n",
       "      <td>2405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>5</td>\n",
       "      <td>13509</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>5</td>\n",
       "      <td>12362</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>5</td>\n",
       "      <td>7636</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>5</td>\n",
       "      <td>5155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546166</th>\n",
       "      <td>223267</td>\n",
       "      <td>12181</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546167</th>\n",
       "      <td>223267</td>\n",
       "      <td>3569</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546168</th>\n",
       "      <td>223277</td>\n",
       "      <td>11865</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546169</th>\n",
       "      <td>223686</td>\n",
       "      <td>12983</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546170</th>\n",
       "      <td>223842</td>\n",
       "      <td>1801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546171</th>\n",
       "      <td>223842</td>\n",
       "      <td>1418</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546172</th>\n",
       "      <td>223842</td>\n",
       "      <td>146</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546173</th>\n",
       "      <td>223842</td>\n",
       "      <td>3033</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546174</th>\n",
       "      <td>223842</td>\n",
       "      <td>282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546175</th>\n",
       "      <td>223842</td>\n",
       "      <td>2883</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546176</th>\n",
       "      <td>223842</td>\n",
       "      <td>2161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546177</th>\n",
       "      <td>223842</td>\n",
       "      <td>10018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546178</th>\n",
       "      <td>223842</td>\n",
       "      <td>11218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546179</th>\n",
       "      <td>223842</td>\n",
       "      <td>4753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546180</th>\n",
       "      <td>223842</td>\n",
       "      <td>9687</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546181</th>\n",
       "      <td>223842</td>\n",
       "      <td>1321</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546182</th>\n",
       "      <td>223842</td>\n",
       "      <td>601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546183</th>\n",
       "      <td>223842</td>\n",
       "      <td>1340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546184</th>\n",
       "      <td>223842</td>\n",
       "      <td>1335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546185</th>\n",
       "      <td>223842</td>\n",
       "      <td>3428</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546186</th>\n",
       "      <td>223842</td>\n",
       "      <td>6155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546187</th>\n",
       "      <td>223842</td>\n",
       "      <td>4664</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546188</th>\n",
       "      <td>223842</td>\n",
       "      <td>2918</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546189</th>\n",
       "      <td>223842</td>\n",
       "      <td>6607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546190</th>\n",
       "      <td>223842</td>\n",
       "      <td>3576</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546191</th>\n",
       "      <td>223842</td>\n",
       "      <td>7033</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546192</th>\n",
       "      <td>223842</td>\n",
       "      <td>2391</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546193</th>\n",
       "      <td>223842</td>\n",
       "      <td>2625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546194</th>\n",
       "      <td>223842</td>\n",
       "      <td>6477</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546195</th>\n",
       "      <td>223969</td>\n",
       "      <td>9758</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>546196 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           uid    iid\n",
       "0            0  12960\n",
       "1            1  12726\n",
       "2            1  11463\n",
       "3            1  10739\n",
       "4            1   3441\n",
       "5            1    301\n",
       "6            1  13291\n",
       "7            1   2814\n",
       "8            1   2857\n",
       "9            2  12860\n",
       "10           2  11091\n",
       "11           2  13057\n",
       "12           3   8992\n",
       "13           3  11082\n",
       "14           3   2665\n",
       "15           3  12570\n",
       "16           3  13410\n",
       "17           3  12714\n",
       "18           3  14649\n",
       "19           3   2635\n",
       "20           4  14339\n",
       "21           4  13000\n",
       "22           5   1326\n",
       "23           5   2308\n",
       "24           5   1934\n",
       "25           5   2405\n",
       "26           5  13509\n",
       "27           5  12362\n",
       "28           5   7636\n",
       "29           5   5155\n",
       "...        ...    ...\n",
       "546166  223267  12181\n",
       "546167  223267   3569\n",
       "546168  223277  11865\n",
       "546169  223686  12983\n",
       "546170  223842   1801\n",
       "546171  223842   1418\n",
       "546172  223842    146\n",
       "546173  223842   3033\n",
       "546174  223842    282\n",
       "546175  223842   2883\n",
       "546176  223842   2161\n",
       "546177  223842  10018\n",
       "546178  223842  11218\n",
       "546179  223842   4753\n",
       "546180  223842   9687\n",
       "546181  223842   1321\n",
       "546182  223842    601\n",
       "546183  223842   1340\n",
       "546184  223842   1335\n",
       "546185  223842   3428\n",
       "546186  223842   6155\n",
       "546187  223842   4664\n",
       "546188  223842   2918\n",
       "546189  223842   6607\n",
       "546190  223842   3576\n",
       "546191  223842   7033\n",
       "546192  223842   2391\n",
       "546193  223842   2625\n",
       "546194  223842   6477\n",
       "546195  223969   9758\n",
       "\n",
       "[546196 rows x 2 columns]"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\pandas\\indexes\\base.py:2397: RuntimeWarning: unorderable types: str() < int(), sort order is undefined for incomparable objects\n",
      "  return this.join(other, how=how, return_indexers=return_indexers)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_pre_score(train_df,df,1,12726,similarity=sim_pearson)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "sss = test_df.apply(lambda x:x.loc['uid']+1,axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0              1\n",
       "1              2\n",
       "2              2\n",
       "3              2\n",
       "4              2\n",
       "5              2\n",
       "6              2\n",
       "7              2\n",
       "8              2\n",
       "9              3\n",
       "10             3\n",
       "11             3\n",
       "12             4\n",
       "13             4\n",
       "14             4\n",
       "15             4\n",
       "16             4\n",
       "17             4\n",
       "18             4\n",
       "19             4\n",
       "20             5\n",
       "21             5\n",
       "22             6\n",
       "23             6\n",
       "24             6\n",
       "25             6\n",
       "26             6\n",
       "27             6\n",
       "28             6\n",
       "29             6\n",
       "           ...  \n",
       "546166    223268\n",
       "546167    223268\n",
       "546168    223278\n",
       "546169    223687\n",
       "546170    223843\n",
       "546171    223843\n",
       "546172    223843\n",
       "546173    223843\n",
       "546174    223843\n",
       "546175    223843\n",
       "546176    223843\n",
       "546177    223843\n",
       "546178    223843\n",
       "546179    223843\n",
       "546180    223843\n",
       "546181    223843\n",
       "546182    223843\n",
       "546183    223843\n",
       "546184    223843\n",
       "546185    223843\n",
       "546186    223843\n",
       "546187    223843\n",
       "546188    223843\n",
       "546189    223843\n",
       "546190    223843\n",
       "546191    223843\n",
       "546192    223843\n",
       "546193    223843\n",
       "546194    223843\n",
       "546195    223970\n",
       "dtype: int64"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0           0.108345\n",
       "22417       0.506746\n",
       "75740       0.070244\n",
       "133545     -0.140171\n",
       "189704      0.098823\n",
       "264330      0.682060\n",
       "399412      0.029248\n",
       "446712      0.779423\n",
       "455401      0.348964\n",
       "532401      0.152250\n",
       "544950      0.116187\n",
       "683066      0.198555\n",
       "824862      0.325302\n",
       "829146      0.445532\n",
       "889421      0.092948\n",
       "937099      0.081804\n",
       "1228843     0.345288\n",
       "1524562     0.239224\n",
       "1531616    -0.534918\n",
       "1610947     0.866025\n",
       "1613316     0.000000\n",
       "1945878     0.237484\n",
       "1975673    -0.463928\n",
       "2508072     0.044040\n",
       "2573884    -0.066227\n",
       "2626097    -0.137361\n",
       "2847780     0.226062\n",
       "2925583     0.000000\n",
       "2978153     0.000000\n",
       "2978210    -0.012723\n",
       "              ...   \n",
       "23357655    0.447024\n",
       "23643563    0.047023\n",
       "23682105    0.290873\n",
       "23837452    0.493844\n",
       "24007442    0.234268\n",
       "24029835    0.035584\n",
       "24132610    0.094046\n",
       "24231943    0.644008\n",
       "24442898    0.182779\n",
       "25088554    0.000000\n",
       "25189345    0.040870\n",
       "25298375    0.225594\n",
       "25299621    0.000000\n",
       "25299713   -0.145003\n",
       "26268652   -0.123613\n",
       "26295379    0.280610\n",
       "26400233   -0.107417\n",
       "26786383    0.254387\n",
       "27409202    0.092698\n",
       "27778636    1.000000\n",
       "28119562   -0.073545\n",
       "29205330    0.000000\n",
       "29381192    0.043747\n",
       "29428720    0.446368\n",
       "29523303    0.128565\n",
       "29922569    0.071664\n",
       "30211590    0.000000\n",
       "31203412   -0.078102\n",
       "31297053   -0.015019\n",
       "32051787    0.338003\n",
       "Name: uid, dtype: float64"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdf = train_df.ix[train_df.iid==0,:].ix[:,['uid','score']]\n",
    "def f(x):\n",
    "    return sim_pearson(df,1,x)\n",
    "pdf['uid'].apply(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uid</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22417</th>\n",
       "      <td>33</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75740</th>\n",
       "      <td>136</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>133545</th>\n",
       "      <td>232</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>189704</th>\n",
       "      <td>343</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>264330</th>\n",
       "      <td>494</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399412</th>\n",
       "      <td>881</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>446712</th>\n",
       "      <td>998</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>455401</th>\n",
       "      <td>1021</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>532401</th>\n",
       "      <td>1189</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>544950</th>\n",
       "      <td>1224</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>683066</th>\n",
       "      <td>1615</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>824862</th>\n",
       "      <td>1940</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>829146</th>\n",
       "      <td>1955</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>889421</th>\n",
       "      <td>2136</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>937099</th>\n",
       "      <td>2287</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1228843</th>\n",
       "      <td>3246</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1524562</th>\n",
       "      <td>4399</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1531616</th>\n",
       "      <td>4431</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1610947</th>\n",
       "      <td>4863</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1613316</th>\n",
       "      <td>4878</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1945878</th>\n",
       "      <td>5887</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1975673</th>\n",
       "      <td>6029</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2508072</th>\n",
       "      <td>8290</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2573884</th>\n",
       "      <td>8448</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2626097</th>\n",
       "      <td>8665</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2847780</th>\n",
       "      <td>9621</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2925583</th>\n",
       "      <td>9990</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2978153</th>\n",
       "      <td>10333</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2978210</th>\n",
       "      <td>10334</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23357655</th>\n",
       "      <td>120791</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23643563</th>\n",
       "      <td>124243</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23682105</th>\n",
       "      <td>124795</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23837452</th>\n",
       "      <td>125200</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24007442</th>\n",
       "      <td>125702</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24029835</th>\n",
       "      <td>125766</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24132610</th>\n",
       "      <td>126096</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24231943</th>\n",
       "      <td>126547</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24442898</th>\n",
       "      <td>127659</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25088554</th>\n",
       "      <td>134381</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25189345</th>\n",
       "      <td>135947</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25298375</th>\n",
       "      <td>137714</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25299621</th>\n",
       "      <td>137715</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25299713</th>\n",
       "      <td>137716</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26268652</th>\n",
       "      <td>142794</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26295379</th>\n",
       "      <td>143083</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26400233</th>\n",
       "      <td>144515</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26786383</th>\n",
       "      <td>149499</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27409202</th>\n",
       "      <td>151934</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27778636</th>\n",
       "      <td>155834</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28119562</th>\n",
       "      <td>161608</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29205330</th>\n",
       "      <td>170325</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29381192</th>\n",
       "      <td>172578</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29428720</th>\n",
       "      <td>172828</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29523303</th>\n",
       "      <td>173340</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29922569</th>\n",
       "      <td>175268</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30211590</th>\n",
       "      <td>179108</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31203412</th>\n",
       "      <td>190148</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31297053</th>\n",
       "      <td>192920</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32051787</th>\n",
       "      <td>202796</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>191 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             uid  score\n",
       "0              0      2\n",
       "22417         33      3\n",
       "75740        136      3\n",
       "133545       232      1\n",
       "189704       343      4\n",
       "264330       494      3\n",
       "399412       881      2\n",
       "446712       998      3\n",
       "455401      1021      4\n",
       "532401      1189      3\n",
       "544950      1224      3\n",
       "683066      1615      2\n",
       "824862      1940      1\n",
       "829146      1955      4\n",
       "889421      2136      3\n",
       "937099      2287      2\n",
       "1228843     3246      1\n",
       "1524562     4399      5\n",
       "1531616     4431      5\n",
       "1610947     4863      3\n",
       "1613316     4878      4\n",
       "1945878     5887      3\n",
       "1975673     6029      4\n",
       "2508072     8290      4\n",
       "2573884     8448      4\n",
       "2626097     8665      2\n",
       "2847780     9621      5\n",
       "2925583     9990      3\n",
       "2978153    10333      4\n",
       "2978210    10334      5\n",
       "...          ...    ...\n",
       "23357655  120791      5\n",
       "23643563  124243      4\n",
       "23682105  124795      5\n",
       "23837452  125200      3\n",
       "24007442  125702      3\n",
       "24029835  125766      3\n",
       "24132610  126096      3\n",
       "24231943  126547      2\n",
       "24442898  127659      5\n",
       "25088554  134381      1\n",
       "25189345  135947      5\n",
       "25298375  137714      4\n",
       "25299621  137715      2\n",
       "25299713  137716      3\n",
       "26268652  142794      2\n",
       "26295379  143083      4\n",
       "26400233  144515      4\n",
       "26786383  149499      2\n",
       "27409202  151934      3\n",
       "27778636  155834      3\n",
       "28119562  161608      4\n",
       "29205330  170325      5\n",
       "29381192  172578      3\n",
       "29428720  172828      5\n",
       "29523303  173340      3\n",
       "29922569  175268      3\n",
       "30211590  179108      5\n",
       "31203412  190148      5\n",
       "31297053  192920      3\n",
       "32051787  202796      4\n",
       "\n",
       "[191 rows x 2 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0           0.216690\n",
       "22417       1.520238\n",
       "75740       0.210732\n",
       "189704      0.395292\n",
       "264330      2.046179\n",
       "399412      0.058497\n",
       "446712      2.338269\n",
       "455401      1.395855\n",
       "532401      0.456751\n",
       "544950      0.348562\n",
       "683066      0.397111\n",
       "824862      0.325302\n",
       "829146      1.782129\n",
       "889421      0.278844\n",
       "937099      0.163609\n",
       "1228843     0.345288\n",
       "1524562     1.196121\n",
       "1610947     2.598076\n",
       "1945878     0.712452\n",
       "2508072     0.176159\n",
       "2847780     1.130311\n",
       "2979308     0.773871\n",
       "3059429     0.491015\n",
       "3161776     0.468796\n",
       "3224984     0.481869\n",
       "3276621     1.043522\n",
       "3289615     0.291895\n",
       "3330626     0.668419\n",
       "3842239     0.973904\n",
       "4092920     0.382864\n",
       "              ...   \n",
       "19055535    3.000000\n",
       "19479426    2.337051\n",
       "19507396    1.967521\n",
       "20535724    0.967721\n",
       "21219867    1.036038\n",
       "21220272    4.000000\n",
       "21712465    0.074953\n",
       "21836398    4.000000\n",
       "22597580    1.171080\n",
       "22854281    0.630237\n",
       "23357655    2.235118\n",
       "23643563    0.188092\n",
       "23682105    1.454365\n",
       "23837452    1.481532\n",
       "24007442    0.702804\n",
       "24029835    0.106753\n",
       "24132610    0.282138\n",
       "24231943    1.288016\n",
       "24442898    0.913894\n",
       "25189345    0.204348\n",
       "25298375    0.902377\n",
       "26295379    1.122441\n",
       "26786383    0.508774\n",
       "27409202    0.278095\n",
       "27778636    3.000000\n",
       "29381192    0.131241\n",
       "29428720    2.231842\n",
       "29523303    0.385695\n",
       "29922569    0.214992\n",
       "32051787    1.352012\n",
       "dtype: float64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pdf['uid'].apply(f)*pdf['score']\n",
    "s[s>0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "129.26413110753057"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(s[s>0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10834491247860874"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sim_pearson(df,0,1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df2 = train_df.set_index(['iid','uid'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>score</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>iid</th>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <th>0</th>\n",
       "      <td>5</td>\n",
       "      <td>245</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115</th>\n",
       "      <th>0</th>\n",
       "      <td>5</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>164</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>170</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>598</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>301</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>313</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>417</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>314</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>322</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>417</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>353</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>355</th>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>356</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>434</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>598</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>245</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>451</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>488</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>54</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>501</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>723</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>555</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>561</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>568</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>588</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>620</th>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1346</th>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>245</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1353</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1386</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1399</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1415</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>561</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1423</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>467</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1454</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1458</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>1267</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1462</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>245</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1470</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1498</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1543</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1551</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1586</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>245</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1603</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1618</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1623</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1629</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1674</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1697</th>\n",
       "      <th>0</th>\n",
       "      <td>5</td>\n",
       "      <td>89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1743</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1767</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1782</th>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1801</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>585</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1816</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1823</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>44</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1824</th>\n",
       "      <th>0</th>\n",
       "      <td>5</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1829</th>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1874</th>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>48</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          score  time\n",
       "iid  uid             \n",
       "0    0        2    19\n",
       "8    0        4   273\n",
       "13   0        1   587\n",
       "18   0        3    15\n",
       "34   0        3    17\n",
       "38   0        4    37\n",
       "44   0        5   245\n",
       "59   0        2   308\n",
       "115  0        5   177\n",
       "124  0        1    37\n",
       "164  0        3    43\n",
       "170  0        3   177\n",
       "196  0        3   598\n",
       "301  0        3   308\n",
       "313  0        4   417\n",
       "314  0        2   273\n",
       "322  0        4   417\n",
       "353  0        4    37\n",
       "355  0        1    89\n",
       "356  0        2   177\n",
       "410  0        3   177\n",
       "434  0        3   598\n",
       "442  0        4   245\n",
       "451  0        2    48\n",
       "488  0        4    54\n",
       "501  0        4   723\n",
       "555  0        2   561\n",
       "568  0        4   177\n",
       "588  0        3    37\n",
       "620  0        1    17\n",
       "...         ...   ...\n",
       "1346 0        1   245\n",
       "1353 0        2    37\n",
       "1386 0        3   273\n",
       "1399 0        2    37\n",
       "1415 0        2   561\n",
       "1423 0        3   467\n",
       "1454 0        3   177\n",
       "1458 0        2  1267\n",
       "1462 0        3   245\n",
       "1470 0        2    48\n",
       "1498 0        4    37\n",
       "1543 0        4    37\n",
       "1551 0        3   835\n",
       "1586 0        4   245\n",
       "1603 0        4    43\n",
       "1618 0        2   273\n",
       "1623 0        3    89\n",
       "1629 0        4   835\n",
       "1674 0        3    17\n",
       "1697 0        5    89\n",
       "1743 0        4    43\n",
       "1767 0        2   835\n",
       "1782 0        1    43\n",
       "1801 0        3   585\n",
       "1806 0        4    37\n",
       "1816 0        3    37\n",
       "1823 0        2    44\n",
       "1824 0        5   273\n",
       "1829 0        4   587\n",
       "1874 0        3    48\n",
       "\n",
       "[100 rows x 2 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.head(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "280\n",
      "20\n",
      "20\n"
     ]
    }
   ],
   "source": [
    "df.head(100)\n",
    "df1 = df.ix[0]\n",
    "index1 = df1.index\n",
    "print index1.size\n",
    "df2 = df.ix[1]\n",
    "index2 = df2.index\n",
    "te = index1.intersection(index2)\n",
    "print te.size\n",
    "l1 = [df1.loc[it,'score']**2 for it in te]\n",
    "sumte = sum(l1)\n",
    "print len(l1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.291556023595\n",
      "0.17182374000549316\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "on = time.time()\n",
    "\n",
    "print(sim_pearson(df,0,895))\n",
    "\n",
    "print(time.time()-on)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uid</th>\n",
       "      <th>iid</th>\n",
       "      <th>score</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>4</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>18</td>\n",
       "      <td>3</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>34</td>\n",
       "      <td>3</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td>38</td>\n",
       "      <td>4</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0</td>\n",
       "      <td>44</td>\n",
       "      <td>5</td>\n",
       "      <td>245</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0</td>\n",
       "      <td>59</td>\n",
       "      <td>2</td>\n",
       "      <td>308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0</td>\n",
       "      <td>115</td>\n",
       "      <td>5</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0</td>\n",
       "      <td>124</td>\n",
       "      <td>1</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   uid  iid  score  time\n",
       "0    0    0      2    19\n",
       "1    0    8      4   273\n",
       "2    0   13      1   587\n",
       "3    0   18      3    15\n",
       "4    0   34      3    17\n",
       "5    0   38      4    37\n",
       "6    0   44      5   245\n",
       "7    0   59      2   308\n",
       "8    0  115      5   177\n",
       "9    0  124      1    37"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>score</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th>iid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"30\" valign=\"top\">0</th>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>4</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1</td>\n",
       "      <td>587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>3</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>3</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>4</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>5</td>\n",
       "      <td>245</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>2</td>\n",
       "      <td>308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115</th>\n",
       "      <td>5</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>1</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>164</th>\n",
       "      <td>3</td>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>170</th>\n",
       "      <td>3</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <td>3</td>\n",
       "      <td>598</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>301</th>\n",
       "      <td>3</td>\n",
       "      <td>308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>313</th>\n",
       "      <td>4</td>\n",
       "      <td>417</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>314</th>\n",
       "      <td>2</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>322</th>\n",
       "      <td>4</td>\n",
       "      <td>417</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>353</th>\n",
       "      <td>4</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>355</th>\n",
       "      <td>1</td>\n",
       "      <td>89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>356</th>\n",
       "      <td>2</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410</th>\n",
       "      <td>3</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>434</th>\n",
       "      <td>3</td>\n",
       "      <td>598</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442</th>\n",
       "      <td>4</td>\n",
       "      <td>245</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>451</th>\n",
       "      <td>2</td>\n",
       "      <td>48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>488</th>\n",
       "      <td>4</td>\n",
       "      <td>54</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>501</th>\n",
       "      <td>4</td>\n",
       "      <td>723</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>555</th>\n",
       "      <td>2</td>\n",
       "      <td>561</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>568</th>\n",
       "      <td>4</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>588</th>\n",
       "      <td>3</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>620</th>\n",
       "      <td>1</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">223686</th>\n",
       "      <th>4851</th>\n",
       "      <td>4</td>\n",
       "      <td>1339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4945</th>\n",
       "      <td>4</td>\n",
       "      <td>1339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5358</th>\n",
       "      <td>5</td>\n",
       "      <td>1339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6849</th>\n",
       "      <td>3</td>\n",
       "      <td>1339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8521</th>\n",
       "      <td>2</td>\n",
       "      <td>1339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"10\" valign=\"top\">223842</th>\n",
       "      <th>1685</th>\n",
       "      <td>1</td>\n",
       "      <td>1339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2336</th>\n",
       "      <td>4</td>\n",
       "      <td>1335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3135</th>\n",
       "      <td>3</td>\n",
       "      <td>1343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4886</th>\n",
       "      <td>4</td>\n",
       "      <td>1343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5429</th>\n",
       "      <td>3</td>\n",
       "      <td>1343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5643</th>\n",
       "      <td>5</td>\n",
       "      <td>1343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6441</th>\n",
       "      <td>4</td>\n",
       "      <td>1335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8383</th>\n",
       "      <td>4</td>\n",
       "      <td>1343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8400</th>\n",
       "      <td>3</td>\n",
       "      <td>1343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9650</th>\n",
       "      <td>5</td>\n",
       "      <td>1343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"15\" valign=\"top\">223969</th>\n",
       "      <th>5358</th>\n",
       "      <td>3</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6491</th>\n",
       "      <td>4</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8677</th>\n",
       "      <td>4</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9364</th>\n",
       "      <td>3</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9902</th>\n",
       "      <td>4</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11153</th>\n",
       "      <td>3</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11205</th>\n",
       "      <td>3</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11559</th>\n",
       "      <td>3</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12355</th>\n",
       "      <td>4</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12681</th>\n",
       "      <td>2</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12729</th>\n",
       "      <td>2</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12983</th>\n",
       "      <td>1</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13000</th>\n",
       "      <td>4</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13291</th>\n",
       "      <td>3</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13531</th>\n",
       "      <td>4</td>\n",
       "      <td>1346</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>33177270 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              score  time\n",
       "uid    iid               \n",
       "0      0          2    19\n",
       "       8          4   273\n",
       "       13         1   587\n",
       "       18         3    15\n",
       "       34         3    17\n",
       "       38         4    37\n",
       "       44         5   245\n",
       "       59         2   308\n",
       "       115        5   177\n",
       "       124        1    37\n",
       "       164        3    43\n",
       "       170        3   177\n",
       "       196        3   598\n",
       "       301        3   308\n",
       "       313        4   417\n",
       "       314        2   273\n",
       "       322        4   417\n",
       "       353        4    37\n",
       "       355        1    89\n",
       "       356        2   177\n",
       "       410        3   177\n",
       "       434        3   598\n",
       "       442        4   245\n",
       "       451        2    48\n",
       "       488        4    54\n",
       "       501        4   723\n",
       "       555        2   561\n",
       "       568        4   177\n",
       "       588        3    37\n",
       "       620        1    17\n",
       "...             ...   ...\n",
       "223686 4851       4  1339\n",
       "       4945       4  1339\n",
       "       5358       5  1339\n",
       "       6849       3  1339\n",
       "       8521       2  1339\n",
       "223842 1685       1  1339\n",
       "       2336       4  1335\n",
       "       3135       3  1343\n",
       "       4886       4  1343\n",
       "       5429       3  1343\n",
       "       5643       5  1343\n",
       "       6441       4  1335\n",
       "       8383       4  1343\n",
       "       8400       3  1343\n",
       "       9650       5  1343\n",
       "223969 5358       3  1346\n",
       "       6491       4  1346\n",
       "       8677       4  1346\n",
       "       9364       3  1346\n",
       "       9902       4  1346\n",
       "       11153      3  1346\n",
       "       11205      3  1346\n",
       "       11559      3  1346\n",
       "       12355      4  1346\n",
       "       12681      2  1346\n",
       "       12729      2  1346\n",
       "       12983      1  1346\n",
       "       13000      4  1346\n",
       "       13291      3  1346\n",
       "       13531      4  1346\n",
       "\n",
       "[33177270 rows x 2 columns]"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14620\n",
      "157949\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,   10,\n",
       "         11,   12,   13,   14,   15,   16,   17,   18,   19,   20,   21,\n",
       "         22,   23,   24,   25,   26,   27,   28,   29,   30,   31,   32,\n",
       "         33,   34,   35,   36,   37,   38,   39,   40,   41,   42,   43,\n",
       "         44,   45,   46,   47,   48,   50,   51,   52,   53,   54,   55,\n",
       "         56,   57,   58,   59,   60,   61,   62,   63,   64,   65,   66,\n",
       "         67,   68,   69,   70,   71,   72,   73,   74,   75,   76,   77,\n",
       "         78,   79,   80,   81,   82,   83,   84,   85,   86,   87,   88,\n",
       "         89,   90,   91,   92,   93,   94,   95,   96,   97,  100,  101,\n",
       "        102,  104,  105,  106,  107,  108,  109,  110,  111,  112,  113,\n",
       "        114,  115,  116,  117,  118,  119,  120,  121,  122,  123,  124,\n",
       "        125,  126,  127,  128,  129,  130,  131,  132,  133,  134,  135,\n",
       "        136,  137,  138,  139,  140,  141,  142,  143,  144,  145,  146,\n",
       "        147,  148,  149,  150,  151,  152,  153,  154,  155,  156,  157,\n",
       "        158,  159,  160,  161,  162,  163,  164,  165,  166,  167,  168,\n",
       "        169,  170,  171,  172,  173,  174,  175,  176,  177,  178,  179,\n",
       "        180,  181,  182,  183,  184,  185,  186,  187,  188,  189,  190,\n",
       "        191,  192,  193,  194,  195,  196,  197,  198,  199,  200,  201,\n",
       "        202,  203,  204,  205,  206,  207,  208,  209,  210,  211,  212,\n",
       "        213,  214,  215,  216,  217,  218,  219,  220,  221,  222,  223,\n",
       "        224,  225,  226,  227,  228,  229,  230,  231,  232,  233,  234,\n",
       "        235,  236,  237,  238,  239,  240,  241,  242,  243,  244,  245,\n",
       "        246,  247,  248,  249,  250,  251,  252,  253,  254,  255,  256,\n",
       "        257,  258,  259,  260,  261,  262,  263,  264,  265,  266,  267,\n",
       "        268,  269,  270,  271,  273,  274,  275,  276,  277,  278,  280,\n",
       "        282,  283,  284,  285,  286,  287,  288,  289,  290,  291,  292,\n",
       "        293,  294,  295,  296,  297,  298,  299,  300,  301,  302,  303,\n",
       "        304,  305,  306,  307,  308,  309,  310,  311,  312,  313,  314,\n",
       "        315,  316,  317,  318,  319,  320,  321,  322,  323,  324,  325,\n",
       "        326,  327,  328,  329,  330,  331,  332,  333,  334,  335,  336,\n",
       "        337,  338,  339,  340,  341,  342,  343,  344,  345,  346,  347,\n",
       "        348,  349,  350,  351,  352,  353,  354,  356,  357,  358,  359,\n",
       "        360,  361,  362,  364,  365,  366,  367,  368,  369,  370,  371,\n",
       "        372,  373,  374,  375,  376,  377,  378,  379,  380,  381,  382,\n",
       "        383,  384,  385,  386,  387,  388,  389,  390,  391,  392,  393,\n",
       "        394,  395,  396,  397,  398,  399,  400,  401,  402,  403,  404,\n",
       "        405,  406,  407,  408,  409,  410,  411,  412,  413,  414,  415,\n",
       "        416,  417,  418,  419,  420,  421,  422,  423,  424,  425,  426,\n",
       "        427,  428,  429,  430,  432,  433,  434,  435,  436,  437,  438,\n",
       "        439,  440,  442,  443,  444,  445,  446,  447,  448,  449,  450,\n",
       "        451,  452,  453,  454,  455,  456,  457,  458,  459,  460,  461,\n",
       "        462,  463,  464,  465,  466,  467,  468,  469,  471,  472,  474,\n",
       "        475,  476,  477,  478,  479,  480,  481,  482,  483,  484,  485,\n",
       "        486,  487,  488,  489,  490,  491,  492,  493,  494,  495,  496,\n",
       "        497,  498,  499,  500,  501,  502,  503,  504,  505,  506,  507,\n",
       "        508,  509,  510,  511,  512,  513,  514,  515,  516,  517,  519,\n",
       "        521,  522,  523,  524,  526,  528,  529,  530,  531,  532,  533,\n",
       "        534,  535,  536,  537,  538,  539,  540,  541,  542,  543,  544,\n",
       "        545,  546,  547,  548,  550,  551,  552,  553,  554,  555,  556,\n",
       "        557,  558,  559,  560,  561,  562,  563,  564,  565,  566,  567,\n",
       "        568,  569,  570,  571,  572,  573,  574,  575,  576,  577,  578,\n",
       "        579,  580,  581,  582,  583,  584,  585,  586,  587,  588,  589,\n",
       "        590,  591,  592,  593,  595,  596,  597,  598,  600,  601,  602,\n",
       "        603,  604,  605,  606,  607,  608,  609,  611,  612,  613,  614,\n",
       "        616,  617,  618,  619,  620,  621,  622,  623,  624,  625,  626,\n",
       "        627,  628,  630,  631,  632,  633,  634,  635,  636,  637,  638,\n",
       "        639,  640,  641,  642,  643,  644,  645,  646,  647,  648,  650,\n",
       "        651,  653,  654,  655,  656,  657,  658,  659,  660,  661,  662,\n",
       "        663,  664,  665,  666,  667,  669,  670,  672,  673,  674,  675,\n",
       "        676,  678,  679,  680,  681,  682,  683,  684,  685,  686,  687,\n",
       "        688,  689,  690,  691,  692,  693,  695,  696,  697,  698,  699,\n",
       "        700,  701,  702,  703,  704,  706,  707,  708,  709,  710,  711,\n",
       "        712,  713,  714,  715,  716,  717,  718,  719,  720,  721,  722,\n",
       "        723,  724,  725,  726,  728,  729,  730,  731,  732,  733,  734,\n",
       "        735,  736,  737,  738,  739,  740,  741,  742,  743,  744,  745,\n",
       "        747,  748,  749,  750,  751,  752,  753,  754,  755,  756,  757,\n",
       "        758,  759,  760,  761,  762,  763,  764,  765,  766,  767,  768,\n",
       "        769,  770,  771,  772,  773,  774,  775,  776,  778,  779,  780,\n",
       "        781,  782,  783,  784,  785,  786,  787,  788,  789,  790,  791,\n",
       "        792,  793,  794,  795,  796,  797,  798,  799,  800,  801,  802,\n",
       "        803,  804,  805,  806,  807,  808,  809,  810,  811,  812,  813,\n",
       "        814,  815,  816,  817,  818,  819,  820,  821,  822,  823,  824,\n",
       "        825,  826,  827,  828,  829,  830,  831,  832,  833,  834,  835,\n",
       "        836,  837,  838,  839,  840,  841,  842,  843,  844,  845,  846,\n",
       "        847,  848,  849,  850,  851,  852,  853,  854,  855,  856,  857,\n",
       "        858,  859,  860,  861,  862,  863,  864,  865,  866,  867,  868,\n",
       "        869,  870,  871,  873,  874,  875,  876,  877,  878,  879,  880,\n",
       "        881,  882,  883,  884,  885,  886,  887,  888,  889,  890,  891,\n",
       "        892,  893,  894,  895,  896,  897,  898,  899,  900,  901,  902,\n",
       "        903,  904,  906,  907,  908,  909,  910,  911,  912,  913,  914,\n",
       "        915,  917,  918,  919,  920,  921,  922,  923,  924,  925,  926,\n",
       "        927,  928,  929,  931,  932,  933,  934,  935,  936,  937,  938,\n",
       "        939,  940,  941,  942,  943,  944,  945,  946,  947,  948,  949,\n",
       "        950,  951,  952,  953,  954,  955,  957,  958,  959,  960,  961,\n",
       "        962,  963,  964,  965,  966,  967,  968,  969,  970,  971,  972,\n",
       "        973,  974,  975,  976,  977,  978,  979,  981,  982,  983,  984,\n",
       "        985,  986,  987,  988,  989,  990,  991,  992,  993,  995,  996,\n",
       "        997,  998,  999, 1000, 1001, 1003, 1004, 1005, 1006, 1007, 1008,\n",
       "       1009, 1010, 1011, 1013, 1014, 1015, 1017, 1018, 1019, 1020, 1021,\n",
       "       1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032,\n",
       "       1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042], dtype=int64)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser1 = train_df.ix[:,'uid']\n",
    "ser2 = train_df.ix[:,'iid']\n",
    "uuid = ser1.unique()\n",
    "uiid = ser2.unique()\n",
    "print(len(uiid))\n",
    "print(len(uuid))\n",
    "uuid[0:1000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(1.0, 0),\n",
       " (0.10727978097782273, 1),\n",
       " (0.27695585470349865, 2),\n",
       " (0.39035052477783949, 3),\n",
       " (-0.070536011705854343, 4),\n",
       " (0.43934148129721906, 5),\n",
       " (0.12544671927483625, 6),\n",
       " (-0.13887684846330947, 7),\n",
       " (0.26659558398736394, 8),\n",
       " (0.16042304936577173, 9),\n",
       " (-0.10019588657362391, 10),\n",
       " (0.0, 11),\n",
       " (0.41249993705750948, 12),\n",
       " (0.080708173536912647, 13),\n",
       " (0.26937401188058957, 14),\n",
       " (-0.014617633655117153, 15),\n",
       " (0.061434148397873811, 16),\n",
       " (0.33676023496472834, 17),\n",
       " (0.27743079351674066, 18),\n",
       " (0.40598455363746017, 19),\n",
       " (0.057353933467640443, 20),\n",
       " (0.18752289237539818, 21),\n",
       " (0.39990052387002945, 22),\n",
       " (0.17668545570588978, 23),\n",
       " (0.13226001425322165, 24),\n",
       " (0.32063404272194196, 25),\n",
       " (0.53300179088902611, 26),\n",
       " (0.099244754036827407, 27),\n",
       " (0.2836543144655877, 28),\n",
       " (0.30225166106044571, 29),\n",
       " (0.18630692324771156, 30),\n",
       " (0.31288932387873192, 31),\n",
       " (0.17940516233676676, 32),\n",
       " (0.31318411707450849, 33),\n",
       " (0.0, 34),\n",
       " (-0.020806259464411975, 35),\n",
       " (0.36926479327055822, 36),\n",
       " (0.4045530288139253, 37),\n",
       " (0.30836391277313796, 38),\n",
       " (0.1793047845466397, 39),\n",
       " (0.34776623490564468, 40),\n",
       " (0.29697034794707805, 41),\n",
       " (0.098125184666319906, 42),\n",
       " (0.33993202039320242, 43),\n",
       " (0.0, 44),\n",
       " (0.039477101697586135, 45),\n",
       " (0.44163005530805016, 46),\n",
       " (-0.022450662753346864, 47),\n",
       " (0.11379481553341214, 48),\n",
       " (0.10425720702853739, 50),\n",
       " (0.63960214906683133, 51),\n",
       " (0.14002800840280097, 52),\n",
       " (0.38702965728377758, 53),\n",
       " (0.25788071477756375, 54),\n",
       " (0.48729851918521039, 55),\n",
       " (0.16724840200141816, 56),\n",
       " (0.23398959639633435, 57),\n",
       " (0.046123493781728635, 58),\n",
       " (0.0049038465165567079, 59),\n",
       " (0.32865541954031219, 60),\n",
       " (0.40050785259612842, 61),\n",
       " (0.093626505202176052, 62),\n",
       " (0.17707162371703855, 63),\n",
       " (0.13240583787506852, 64),\n",
       " (0.20965696734438366, 65),\n",
       " (0.12652235035099499, 66),\n",
       " (0.2286002286003429, 67),\n",
       " (0.0, 68),\n",
       " (-0.14285714285714285, 69),\n",
       " (0.39500528745094166, 70),\n",
       " (0.0, 71),\n",
       " (-0.30270275729834062, 72),\n",
       " (0.3150389356204214, 73),\n",
       " (0.35355339059327373, 74),\n",
       " (0.19351756745330248, 75),\n",
       " (-0.056154525245591252, 76),\n",
       " (-0.19779694303230888, 77),\n",
       " (0.13258252147247765, 78),\n",
       " (0.16566307331251642, 79),\n",
       " (0.47422310806400936, 80),\n",
       " (0.23090421314870116, 81),\n",
       " (0.082584013901533401, 82),\n",
       " (0.18579547047776473, 83),\n",
       " (0.083205029433784369, 84),\n",
       " (0.14526698815176106, 85),\n",
       " (0.16933350266692068, 86),\n",
       " (-0.032597143890938006, 87),\n",
       " (0.18652619395099398, 88),\n",
       " (0.21052872564651201, 89),\n",
       " (0.33891719470228249, 90),\n",
       " (0.48112522432468813, 91),\n",
       " (0.11945621868631059, 92),\n",
       " (0.061555434528861426, 93),\n",
       " (0.40115638864801045, 94),\n",
       " (-0.024499797877501263, 95),\n",
       " (-0.16320043678429894, 96),\n",
       " (0.22489899383695006, 97),\n",
       " (0.50300303000356883, 100),\n",
       " (0.21067406495303503, 101),\n",
       " (0.19720265943665385, 102)]"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser = train_df.ix[:,'uid']\n",
    "uuid = ser.unique()\n",
    "[(sim_pearson(df,0,it),it) for it in uuid[0:100]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.03125\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "on = time.time()\n",
    "\n",
    "index1 = df.ix[1].index\n",
    "index2 = df.ix[38].index\n",
    "si = index1.intersection(index2)\n",
    "n = len(si)\n",
    "\n",
    "print(time.time()-on)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "143"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
